Amartya Kumar Sinha
Working document for Assignment 1 for International Climate Policy
January 18, 2024

CNETID: amartyaksinha

In [ ]:
import os
import kaleido
import pandas as pd
import plotly.io as pio
import plotly.express as px
import matplotlib.pyplot as plt
from scipy.stats import ttest_ind
from sklearn.metrics import r2_score
from sympy import symbols, Eq, solve
pio.renderers.default = "notebook+pdf"
from sklearn.linear_model import LinearRegression

path = r'C:/Users/amart/OneDrive - The University of Chicago/IntlClimatePolicy_PPHA39930/Assignments/Assn1_IntlClimatePolicy'

Q2. Australian climate data analysis¶

Q2. a)¶

In [ ]:
perth_df = pd.read_csv(os.path.join(path,
                                    'indiv1_perth_airport.csv'), engine='python')

perth_df['DATE'] = pd.to_datetime(perth_df['DATE'])
perth_df.set_index('DATE', inplace=True)

# Filter data from 1981 to 2010
filtered_df = perth_df.loc['1981':'2010']
# Calculate the average precipitation for each month
monthly_climatology = filtered_df.groupby(filtered_df.index.month)['PRCP'].mean()
# Print the average precipitation for each month
for month, avg_precipitation in monthly_climatology.items():
    print(f"Month: {month}, Average Precipitation: {avg_precipitation}")
# Find the rainiest month
rainiest_month = monthly_climatology.idxmax()
print(f"The rainiest month on average across 1981 to 2019 is month number {rainiest_month}")

# Create a figure and a set of subplots
fig, ax = plt.subplots()
# Plot the average precipitation for each month
ax.bar(monthly_climatology.index, monthly_climatology.values)
# Setting x and y-axis labels
ax.set_xlabel('Month')
ax.set_ylabel('Average Precipitation (mm)')

ax.set_title('Monthly Precipitation Climatology of Perth (1981-2010)')

# List of month names for x-tick labels
months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
ax.set_xticks(range(1, 13))
ax.set_xticklabels(months, rotation=45)

# Showing values of y-axis by annotating each data point with its 
# corresponding value
for i, v in enumerate(monthly_climatology.values):
    ax.text(i+1, v + 0.5, str(round(v, 2)), ha='center')

plt.show()
Month: 1, Average Precipitation: 13.006666666666666
Month: 2, Average Precipitation: 17.560000000000002
Month: 3, Average Precipitation: 18.4
Month: 4, Average Precipitation: 35.10666666666667
Month: 5, Average Precipitation: 89.06666666666666
Month: 6, Average Precipitation: 139.72
Month: 7, Average Precipitation: 146.94
Month: 8, Average Precipitation: 114.53999999999999
Month: 9, Average Precipitation: 76.83999999999999
Month: 10, Average Precipitation: 35.70666666666667
Month: 11, Average Precipitation: 28.44
Month: 12, Average Precipitation: 7.78
The rainiest month on average across 1981 to 2019 is month number 7
No description has been provided for this image

Q2. b)¶

In [ ]:
# Filter data from 1944 onwards
filtered_df = perth_df.loc['1944':]

def plot_rainfall_trend(df, month, y_label, title):
    # Ensure month is a list-like object
    if not isinstance(month, (list, tuple)):
        month = [month]

    # Filter data for the specified month
    if len(month) == 1:
        monthly_rainfall = df[df.index.month == month[0]]['PRCP'].resample('Y').mean().reset_index()
    else:
        monthly_rainfall = df[df.index.month.isin(month)]['PRCP'].resample('Y').mean().reset_index()
    
    # Create a figure and set the size
    plt.figure(figsize=(10, 6))
    
    # Scatter plot for average monthly rainfall
    plt.scatter(x=monthly_rainfall['DATE'].dt.year, y=monthly_rainfall['PRCP'], color='skyblue', label='Average Monthly Rainfall')
    
    # Fit a linear trend line
    X = monthly_rainfall['DATE'].dt.year.values.reshape(-1, 1)
    y = monthly_rainfall['PRCP'].values
    model = LinearRegression().fit(X, y)
    trend_line = model.predict(X)
   
    # Calculate R-squared value
    r_squared = r2_score(y, trend_line)

    # Plot the linear trend line
    plt.plot(monthly_rainfall['DATE'].dt.year, trend_line, color='red', label=f'Linear Trend Line (R²={r_squared:.2f})')

    # Setting x and y-axis labels
    plt.xlabel('Year')
    plt.ylabel(y_label)
    
    # Set the title and show legend
    plt.title(title)
    plt.legend()
    
    # Show the plot
    plt.show()

plot_rainfall_trend(filtered_df, 7, 'July Rainfall (mm)', 'Average July Rainfall in Perth with Linear Trend Line (1944-2019)')

# Performing statistical test using two-sample t-test
earlier_period = perth_df.loc['1951-01-01':'1980-12-31']['PRCP']
later_period = perth_df.loc['1981-01-01':'2010-12-31']['PRCP']

t_statistic, p_value = ttest_ind(earlier_period, later_period)

print(f'Two-Sample T-Test Results:')
print(f'T-statistic: {t_statistic}')
print(f'P-value: {p_value}')

# Check significance at a 95% confidence interval
alpha = 0.05
if p_value < alpha:
    print(f'Difference between the two periods is statistically significant (reject H0).')
else:
    print(f'Difference between the two periods is not statistically significant (fail to reject H0).')
No description has been provided for this image
Two-Sample T-Test Results:
T-statistic: 1.3581011168763193
P-value: 0.17485819704852887
Difference between the two periods is not statistically significant (fail to reject H0).

Q2. c)¶

In [ ]:
plot_rainfall_trend(filtered_df, [5, 6, 7, 8], 'Winter Rainfall (mm)', 'Average Winter Rainfall in Perth with Linear Trend Line (1944-2019)')

# Filter data for winter months (May-August)
winter_rainfall = perth_df[perth_df.index.month.isin(range(5, 9))]['PRCP'].resample('Y').mean().reset_index()

# Performing statistical test using two-sample t-test for average winter rainfall trend
early_period = winter_rainfall[(winter_rainfall['DATE'].dt.year >= 1951) & (winter_rainfall['DATE'].dt.year <= 1980)]['PRCP']
later_period = winter_rainfall[(winter_rainfall['DATE'].dt.year >= 1981) & (winter_rainfall['DATE'].dt.year <= 2010)]['PRCP']

t_statistic, p_value = ttest_ind(early_period, later_period)

print(f'Two-Sample T-Test Results for Average Winter Rainfall Trend:')
print(f'T-statistic: {t_statistic}')
print(f'P-value: {p_value}')

# Check significance at a 95% confidence interval
alpha = 0.05
if p_value < alpha:
    print(f'Difference in average winter rainfall trend is statistically significant (reject H0).')
else:
    print(f'Difference in average winter rainfall trend is not statistically significant (fail to reject H0).')
No description has been provided for this image
Two-Sample T-Test Results for Average Winter Rainfall Trend:
T-statistic: 2.905853284992847
P-value: 0.005177858501204699
Difference in average winter rainfall trend is statistically significant (reject H0).

Q3. Climate change and inequality¶

In [ ]:
county_income = pd.read_csv(os.path.join(path, 
                                         'indiv1_us_counties_incomes.csv'), engine='python')
county_temp = pd.read_csv(os.path.join(path, 
                                       'indiv1_us_counties_temperature.csv'), engine='python')

display(county_income.shape)
display(county_income.head(10))

display(county_temp.shape)
display(county_temp.head(10))
(3112, 2)
fips income_per_capita_2018
0 1001 41618
1 1003 45596
2 1005 35199
3 1007 30254
4 1009 34976
5 1011 28797
6 1013 36450
7 1015 37120
8 1017 33859
9 1019 35505
(3112, 9)
fips county state lat lon normal_1981_2010 rcp85_2020_2039 rcp85_2040_2059 rcp85_2080_2099
0 1001 Autauga County AL 32.535000 -86.642998 18.361113 19.527779 20.516666 22.622223
1 1003 Baldwin County AL 30.736000 -87.723000 19.211113 20.288887 21.133333 23.144447
2 1005 Barbour County AL 31.870001 -85.392998 17.283333 18.438890 19.394447 21.477779
3 1007 Bibb County AL 32.999001 -87.125999 16.433334 17.633335 18.666666 20.805553
4 1009 Blount County AL 33.980999 -86.567001 15.961111 17.166668 18.255556 20.455555
5 1011 Bullock County AL 32.099998 -85.716003 17.283333 18.438890 19.394447 21.477779
6 1013 Butler County AL 31.752001 -86.680000 17.444445 18.588888 19.544445 21.611113
7 1015 Calhoun County AL 33.771000 -85.825996 16.894444 18.083336 19.172224 21.399998
8 1017 Chambers County AL 32.914001 -85.391998 16.505554 17.638889 18.677780 20.833334
9 1019 Cherokee County AL 34.175999 -85.603996 15.177777 16.355555 17.505554 19.811113

Q3. a)¶

In [ ]:
# Histogram based on 1981-2010 temperatures
plt.hist(county_temp['normal_1981_2010'], bins=50, alpha=0.5, label='1981-2010', edgecolor='black')

# Histogram based temperature estimates for 2080-2099 under RCP8.5 emissions
plt.hist(county_temp['rcp85_2080_2099'], bins=50, alpha=0.5, label='2080-2099 under RCP8.5', edgecolor='red')

plt.xlabel('Temperature (°C)')
plt.ylabel('Number of Counties')
plt.title('Histogram of County Temperatures')
plt.legend(loc='upper right')
plt.show()
No description has been provided for this image

Q3 b)¶

In [ ]:
# Creating a merged dataset from the temperature and income datasets 
county_merge = pd.merge(county_income, county_temp, on='fips')

# Calculating income deciles
county_merge['Income Decile'] = pd.qcut(county_merge['income_per_capita_2018'], 10, labels=False)

# Calculating average temperatures for each time period for these income deciles
average_temps = county_merge.groupby('Income Decile').agg({
    'normal_1981_2010': 'mean',
    'rcp85_2020_2039': 'mean',
    'rcp85_2040_2059': 'mean',
    'rcp85_2080_2099': 'mean'
})
average_temps = average_temps.round(3)
display(average_temps)
normal_1981_2010 rcp85_2020_2039 rcp85_2040_2059 rcp85_2080_2099
Income Decile
0 15.358 16.634 17.692 20.046
1 14.776 16.072 17.147 19.539
2 14.035 15.345 16.438 18.872
3 13.124 14.474 15.595 18.083
4 12.707 14.067 15.180 17.686
5 11.979 13.351 14.497 17.057
6 11.395 12.767 13.908 16.481
7 10.902 12.292 13.458 16.086
8 10.237 11.644 12.826 15.490
9 10.981 12.333 13.455 15.986

Q3. c)¶

In [ ]:
plt.figure()

# Plot average temperatures for both time periods
plt.plot(average_temps.index, average_temps['normal_1981_2010'], label='1981-2010')
plt.plot(average_temps.index, average_temps['rcp85_2080_2099'], label='2080-2099 (RCP8.5)')

plt.xlabel('Income Decile')
plt.ylabel('Average Temperature (°C)')
plt.title('Average Temperatures against Income Deciles')
plt.xticks(range(0, 10))
plt.legend()
plt.show()
No description has been provided for this image

Q3. e)¶

In [ ]:
# Calculating change in temperature
county_merge['temp_change'] = county_merge['rcp85_2080_2099'] - county_merge['normal_1981_2010']

# Calculating average temperature change for each income decile
average_temp_change = county_merge.groupby('Income Decile')['temp_change'].mean()

# Finding income decile with the most change
highest_change = average_temp_change.idxmax()
display(f"The income decile that will experience the most change is: {highest_change}")
'The income decile that will experience the most change is: 8'
In [ ]:
# Analysis of spatial pattern changes

spatial_df = pd.DataFrame({
    'lat': county_merge['lat'],
    'lon': county_merge['lon'],
    'temp_change': county_merge['temp_change'],
    'Income Decile': county_merge['Income Decile']
})

# Create a scatter plot on a map
fig = px.scatter_geo(spatial_df, lat='lat', lon='lon', color='temp_change',
                     projection='natural earth', 
                     title='Spatial pattern of temperature changes (RCP8.5)',
                     color_continuous_scale=px.colors.sequential.YlOrRd)
fig.update_geos(resolution=110, showcoastlines=True, coastlinecolor="Black",
                showocean=True, oceancolor="lightblue")
fig.update_layout(coloraxis_colorbar=dict(title="Temperature Change (°C)"))
fig.show()

fig = px.scatter_geo(spatial_df, lat='lat', lon='lon', color='Income Decile',
                     projection='natural earth', 
                     title='Spatial pattern of income deciles')
fig.update_geos(resolution=110, showcoastlines=True, coastlinecolor="Black",
                showocean=True, oceancolor="lightblue")
fig.show()
In [ ]:
filtered_df = spatial_df[spatial_df['Income Decile'] == 8]
display(filtered_df.describe())
display(spatial_df.describe())
lat lon temp_change Income Decile
count 311.000000 311.000000 311.000000 311.0
mean 41.422765 -94.289058 5.252269 8.0
std 5.196372 15.202604 0.623615 0.0
min 26.576000 -170.279010 3.255556 8.0
25% 38.767499 -99.708000 4.919444 8.0
50% 41.771999 -95.309998 5.322222 8.0
75% 44.309000 -84.686500 5.666666 8.0
max 65.017998 -68.345001 7.722222 8.0
lat lon temp_change Income Decile
count 3112.000000 3112.000000 3112.000000 3112.000000
mean 38.470611 -92.389624 4.983169 4.500000
std 5.328819 12.876852 0.594605 2.874085
min 19.598000 -170.279010 2.877775 0.000000
25% 34.659499 -98.347500 4.677780 2.000000
50% 38.417999 -90.495498 5.072222 4.500000
75% 41.852001 -83.602497 5.361110 7.000000
max 69.306000 -67.638000 7.722222 9.000000
In [ ]:
fig = px.scatter_geo(filtered_df, lat='lat', lon='lon', color='temp_change',
                     projection='natural earth', 
                     title='Spatial pattern of temperature changes for decile 8 counties (RCP8.5)',
                     color_continuous_scale=px.colors.sequential.YlOrRd)

fig.update_geos(resolution=110, showcoastlines=True, coastlinecolor="Black",
                showocean=True, oceancolor="lightblue")
fig.update_layout(coloraxis_colorbar=dict(title="Temperature Change (°C)"))

fig.show()

Q4. Climate change communication¶

Q4. a)¶

In [ ]:
ts_data = pd.read_csv(os.path.join(path, 
                                   'timeseries_data.csv'), engine='python', skiprows=4)
In [ ]:
# Converting year to proper date-time format for plotting
ts_data['Year'] = ts_data['Year'].astype(str)
ts_data['Year'] = pd.to_datetime(ts_data['Year'].str.slice(0,4) + '-' + ts_data['Year'].str.slice(4,6))
In [ ]:
plt.figure(figsize=(15, 7))
plt.plot(ts_data['Year'], ts_data['Anomaly'])
plt.xlabel('Year')
plt.ylabel('Temperature Anomaly (°C)')
plt.title('Global Land and Ocean Temperature Anomalies Over Time (base period: 1901-2000)')
plt.xlim([pd.Timestamp('1850-01-01'), pd.Timestamp('2023-12-31')])
Out[ ]:
(-43829.0, 19722.0)
No description has been provided for this image